package com.gxuwz.crawlers;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

public class AuthorJsoupCrawler {
	
	public static void save() {
		try{
			Document doc = Jsoup
					.connect(
							"http://tieba.baidu.com/f?kw=%B1%B1%BE%A9%B3%C7%CA%D0%D1%A7%D4%BA")
					.userAgent(
							"Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9.2.15)")
					.timeout(5000).get();
//			Elements links = doc.select("[id=thread_list]").select(
//					"a[href]");
//			
//			for (Element link : links) {
//				String str = link.attr("href");				
//				if(str.indexOf("/p")>=0&&str.indexOf("http")==-1)//查找帖子的链接并去除广告的链接
//				System.out.println(str);
//			}
			
			//分页（50/页）
			Elements links = doc.select("[id=frs_list_pager]").select("a[href]");
			for (Element link : links) {
			System.out.println(link.attr("href"));
		}
			
		}catch(Exception e){
			e.printStackTrace();
		}
	}
}
